An Attempt to Impute the Data with KNN

Imputing the Data Using Multiple Imputation Through Mice

formulas <- make.formulas(mi_2)

method <- make.method(mi_2)

mi_multiple_imp <- parlmice(mi_2,
  method = method,
  formulas = formulas,
  m = 1,
  n.core = 3,
  cluster.seed = 12,
  n.imp.core = 2,
  cl.type = "FORK")

plot(mi_multiple_imp)
mi_2.5 <- complete(mi_multiple_imp, action = "long", include = TRUE) 
mi_3 <- complete(mi_multiple_imp, action = 1, include = FALSE) 
save.image("mi_mult_imp.RData")

EDA

plot(mi_multiple_imp)

Building A Logistic Regression Model

Split the Data

log_reg_mod <- logistic_reg(
  mode = "classification",
  engine = "glmnet",
  penalty = 0.001,
  mixture = 0
)

log_reg_rec <- recipe(Complication ~ Total_NSIDS., data = mi_2) %>% 
  step_impute_knn(all_numeric_predictors())

log_reg_wkflow <- workflow() %>% 
  add_recipe(log_reg_rec) %>% 
  add_model(log_reg_mod)

Fitting the Logistic Regression Model

log_reg_fit <- fit(log_reg_wkflow, data = mi_2)
tidy(log_reg_fit)

Evaluating the Logistic Regression Model

# Augmented model
model_aug <- augment(log_reg_fit, truth = Complication, 
                     new_data = mi_2)
# roc score
auc_score <- roc_auc(data = model_aug, truth = Complication, 
                     estimate = .pred_1, event_level = "second")
# plot of roc curve
autoplot(roc_curve(data = model_aug, truth = Complication,
                   estimate = .pred_1, event_level = "second"))

save.image("SIBS_log_reg.Rdata")